{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-07-13T03:39:28.866542Z",
     "start_time": "2020-07-13T03:39:27.974663Z"
    }
   },
   "outputs": [],
   "source": [
    "import os\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "np.set_printoptions(precision=2)\n",
    "\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "from scipy import stats\n",
    "from collections import Counter\n",
    "\n",
    "sns.set_style('ticks')\n",
    "\n",
    "%matplotlib inline\n",
    "import warnings\n",
    "warnings.filterwarnings('ignore')\n",
    "import matplotlib as mpl\n",
    "mpl.rcParams['figure.dpi']= 300\n",
    "mpl.rc(\"savefig\", dpi=300)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##### Read files and select drugs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-07-13T03:39:28.873450Z",
     "start_time": "2020-07-13T03:39:28.868851Z"
    }
   },
   "outputs": [],
   "source": [
    "# log2_median_ic50, log2_median_ic50_9f, log2_median_ic50_hn, log2_median_ic50_3f_hn, log2_median_ic50_9f_hn, log2_max_conc, log2_median_ic50_3f_hn\n",
    "ref_type = 'log2_median_ic50_hn' # log2_median_ic50_3f_hn | log2_median_ic50_hn\n",
    "model_name = 'hn_drug_cw_dw10_100000_model' # hn_drug_cw_dw10_100000_model | hn_drug_cw_dw1_100000_model | hn_drug_cw_dwsim10_100000_model\n",
    "\n",
    "# shift the dosage as GDSC experiment (Syto60) is less sensitive\n",
    "dosage_shifted = False"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-07-13T03:39:28.892550Z",
     "start_time": "2020-07-13T03:39:28.877205Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(81, 27)\n"
     ]
    }
   ],
   "source": [
    "drug_info_df = pd.read_csv('../preprocessed_data/GDSC/hn_drug_stat.csv', index_col=0)\n",
    "drug_info_df.index = drug_info_df.index.astype(str)\n",
    "\n",
    "drug_id_name_dict = dict(zip(drug_info_df.index, drug_info_df['Drug Name']))\n",
    "print (drug_info_df.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-07-13T03:39:28.904917Z",
     "start_time": "2020-07-13T03:39:28.895463Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[]"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tested_drug_list = [1032, 1007, 133, 201, 1010] + [182, 301, 302]\n",
    "[d for d in tested_drug_list if d not in drug_info_df.index.astype(int)]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##### Read predicted IC50"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-07-13T03:39:28.911159Z",
     "start_time": "2020-07-13T03:39:28.907718Z"
    }
   },
   "outputs": [],
   "source": [
    "norm_type = 'cell_TPM'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-07-13T03:39:28.946706Z",
     "start_time": "2020-07-13T03:39:28.914024Z"
    }
   },
   "outputs": [],
   "source": [
    "cadrres_cell_df = pd.read_csv('../result/HN_model/{}/pred_gdsc_no_bias_{}.csv'.format(norm_type, model_name), index_col=0)\n",
    "out_dir = '../result/HN_model/{}/'.format(norm_type)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-07-13T03:39:28.980117Z",
     "start_time": "2020-07-13T03:39:28.948891Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>1001</th>\n",
       "      <th>1003</th>\n",
       "      <th>1004</th>\n",
       "      <th>1006</th>\n",
       "      <th>1007</th>\n",
       "      <th>1010</th>\n",
       "      <th>1012</th>\n",
       "      <th>1014</th>\n",
       "      <th>1015</th>\n",
       "      <th>1016</th>\n",
       "      <th>...</th>\n",
       "      <th>299</th>\n",
       "      <th>301</th>\n",
       "      <th>302</th>\n",
       "      <th>303</th>\n",
       "      <th>305</th>\n",
       "      <th>306</th>\n",
       "      <th>308</th>\n",
       "      <th>328</th>\n",
       "      <th>331</th>\n",
       "      <th>346</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>RHH2176</th>\n",
       "      <td>10.843385</td>\n",
       "      <td>-6.879123</td>\n",
       "      <td>-5.535641</td>\n",
       "      <td>-0.435868</td>\n",
       "      <td>-5.928303</td>\n",
       "      <td>1.524818</td>\n",
       "      <td>0.550205</td>\n",
       "      <td>1.791061</td>\n",
       "      <td>2.707693</td>\n",
       "      <td>-1.093647</td>\n",
       "      <td>...</td>\n",
       "      <td>0.886537</td>\n",
       "      <td>2.474108</td>\n",
       "      <td>1.464313</td>\n",
       "      <td>2.902817</td>\n",
       "      <td>3.398912</td>\n",
       "      <td>2.633714</td>\n",
       "      <td>-0.767147</td>\n",
       "      <td>-2.297105</td>\n",
       "      <td>1.418391</td>\n",
       "      <td>-4.079475</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>RHH2177</th>\n",
       "      <td>10.474557</td>\n",
       "      <td>-7.527169</td>\n",
       "      <td>-6.910859</td>\n",
       "      <td>-0.905919</td>\n",
       "      <td>-7.270812</td>\n",
       "      <td>0.719401</td>\n",
       "      <td>0.155810</td>\n",
       "      <td>0.784177</td>\n",
       "      <td>1.748610</td>\n",
       "      <td>-2.465134</td>\n",
       "      <td>...</td>\n",
       "      <td>1.664287</td>\n",
       "      <td>3.003126</td>\n",
       "      <td>1.630304</td>\n",
       "      <td>3.344067</td>\n",
       "      <td>3.843642</td>\n",
       "      <td>3.096594</td>\n",
       "      <td>-0.734055</td>\n",
       "      <td>-1.758856</td>\n",
       "      <td>1.831446</td>\n",
       "      <td>-3.674684</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>RHH2178</th>\n",
       "      <td>10.344092</td>\n",
       "      <td>-8.172785</td>\n",
       "      <td>-7.413486</td>\n",
       "      <td>-1.686217</td>\n",
       "      <td>-8.234032</td>\n",
       "      <td>0.032052</td>\n",
       "      <td>0.287347</td>\n",
       "      <td>-0.055609</td>\n",
       "      <td>1.185569</td>\n",
       "      <td>-2.816860</td>\n",
       "      <td>...</td>\n",
       "      <td>2.969341</td>\n",
       "      <td>3.556121</td>\n",
       "      <td>2.647798</td>\n",
       "      <td>4.025542</td>\n",
       "      <td>4.234615</td>\n",
       "      <td>3.234458</td>\n",
       "      <td>-0.446150</td>\n",
       "      <td>-1.300170</td>\n",
       "      <td>2.665244</td>\n",
       "      <td>-1.997744</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>RHH2179</th>\n",
       "      <td>10.523248</td>\n",
       "      <td>-7.165827</td>\n",
       "      <td>-6.176543</td>\n",
       "      <td>-0.431112</td>\n",
       "      <td>-6.661878</td>\n",
       "      <td>1.091142</td>\n",
       "      <td>0.583582</td>\n",
       "      <td>2.332135</td>\n",
       "      <td>3.083348</td>\n",
       "      <td>-1.690173</td>\n",
       "      <td>...</td>\n",
       "      <td>-0.297377</td>\n",
       "      <td>1.935227</td>\n",
       "      <td>0.005749</td>\n",
       "      <td>2.598242</td>\n",
       "      <td>3.179443</td>\n",
       "      <td>2.147339</td>\n",
       "      <td>-1.168008</td>\n",
       "      <td>-3.003178</td>\n",
       "      <td>0.731648</td>\n",
       "      <td>-4.746325</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>RHH2180</th>\n",
       "      <td>10.892141</td>\n",
       "      <td>-6.086613</td>\n",
       "      <td>-5.476581</td>\n",
       "      <td>0.044589</td>\n",
       "      <td>-6.668638</td>\n",
       "      <td>0.789689</td>\n",
       "      <td>0.844950</td>\n",
       "      <td>2.550936</td>\n",
       "      <td>3.141692</td>\n",
       "      <td>-1.812517</td>\n",
       "      <td>...</td>\n",
       "      <td>2.212161</td>\n",
       "      <td>3.617715</td>\n",
       "      <td>2.072394</td>\n",
       "      <td>3.981815</td>\n",
       "      <td>4.507488</td>\n",
       "      <td>3.369766</td>\n",
       "      <td>0.010497</td>\n",
       "      <td>0.070235</td>\n",
       "      <td>2.338329</td>\n",
       "      <td>-1.610985</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 81 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "              1001      1003      1004      1006      1007      1010  \\\n",
       "RHH2176  10.843385 -6.879123 -5.535641 -0.435868 -5.928303  1.524818   \n",
       "RHH2177  10.474557 -7.527169 -6.910859 -0.905919 -7.270812  0.719401   \n",
       "RHH2178  10.344092 -8.172785 -7.413486 -1.686217 -8.234032  0.032052   \n",
       "RHH2179  10.523248 -7.165827 -6.176543 -0.431112 -6.661878  1.091142   \n",
       "RHH2180  10.892141 -6.086613 -5.476581  0.044589 -6.668638  0.789689   \n",
       "\n",
       "             1012      1014      1015      1016  ...       299       301  \\\n",
       "RHH2176  0.550205  1.791061  2.707693 -1.093647  ...  0.886537  2.474108   \n",
       "RHH2177  0.155810  0.784177  1.748610 -2.465134  ...  1.664287  3.003126   \n",
       "RHH2178  0.287347 -0.055609  1.185569 -2.816860  ...  2.969341  3.556121   \n",
       "RHH2179  0.583582  2.332135  3.083348 -1.690173  ... -0.297377  1.935227   \n",
       "RHH2180  0.844950  2.550936  3.141692 -1.812517  ...  2.212161  3.617715   \n",
       "\n",
       "              302       303       305       306       308       328       331  \\\n",
       "RHH2176  1.464313  2.902817  3.398912  2.633714 -0.767147 -2.297105  1.418391   \n",
       "RHH2177  1.630304  3.344067  3.843642  3.096594 -0.734055 -1.758856  1.831446   \n",
       "RHH2178  2.647798  4.025542  4.234615  3.234458 -0.446150 -1.300170  2.665244   \n",
       "RHH2179  0.005749  2.598242  3.179443  2.147339 -1.168008 -3.003178  0.731648   \n",
       "RHH2180  2.072394  3.981815  4.507488  3.369766  0.010497  0.070235  2.338329   \n",
       "\n",
       "              346  \n",
       "RHH2176 -4.079475  \n",
       "RHH2177 -3.674684  \n",
       "RHH2178 -1.997744  \n",
       "RHH2179 -4.746325  \n",
       "RHH2180 -1.610985  \n",
       "\n",
       "[5 rows x 81 columns]"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cadrres_cell_df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-07-13T03:39:28.991668Z",
     "start_time": "2020-07-13T03:39:28.983708Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "81 1116\n"
     ]
    }
   ],
   "source": [
    "drug_list = drug_info_df.index\n",
    "cluster_list = cadrres_cell_df.index\n",
    "print(len(drug_list), len(cluster_list))\n",
    "\n",
    "drug_info_df = drug_info_df.loc[drug_list]\n",
    "cadrres_cell_df = cadrres_cell_df[drug_list]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-07-13T03:39:28.999523Z",
     "start_time": "2020-07-13T03:39:28.995977Z"
    }
   },
   "outputs": [],
   "source": [
    "if dosage_shifted:\n",
    "    # Shift by 4 uM\n",
    "    cadrres_cluster_df = cadrres_cluster_df - 2"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##### Read cell cluster % in each patient"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-07-13T03:39:29.005429Z",
     "start_time": "2020-07-13T03:39:29.002520Z"
    }
   },
   "outputs": [],
   "source": [
    "# freq_df = pd.read_excel('../preprocessed_data/HN_patient_specific/percent_patient_tpm_cluster.xlsx', index_col=[0, 1]).reset_index()\n",
    "# freq_df = freq_df.pivot(index='patient_id', columns='cluster', values='percent').fillna(0) / 100\n",
    "\n",
    "# patient_list = freq_df.index\n",
    "\n",
    "# freq_df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-07-13T03:39:29.027973Z",
     "start_time": "2020-07-13T03:39:29.008360Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>patient_id</th>\n",
       "      <th>cell_line_id</th>\n",
       "      <th>origin</th>\n",
       "      <th>batch</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>RHH2176</th>\n",
       "      <td>HN120</td>\n",
       "      <td>HN120P</td>\n",
       "      <td>Primary</td>\n",
       "      <td>RHH</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>RHH2177</th>\n",
       "      <td>HN120</td>\n",
       "      <td>HN120P</td>\n",
       "      <td>Primary</td>\n",
       "      <td>RHH</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>RHH2178</th>\n",
       "      <td>HN120</td>\n",
       "      <td>HN120P</td>\n",
       "      <td>Primary</td>\n",
       "      <td>RHH</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>RHH2179</th>\n",
       "      <td>HN120</td>\n",
       "      <td>HN120P</td>\n",
       "      <td>Primary</td>\n",
       "      <td>RHH</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>RHH2180</th>\n",
       "      <td>HN120</td>\n",
       "      <td>HN120P</td>\n",
       "      <td>Primary</td>\n",
       "      <td>RHH</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        patient_id cell_line_id   origin batch\n",
       "RHH2176      HN120       HN120P  Primary   RHH\n",
       "RHH2177      HN120       HN120P  Primary   RHH\n",
       "RHH2178      HN120       HN120P  Primary   RHH\n",
       "RHH2179      HN120       HN120P  Primary   RHH\n",
       "RHH2180      HN120       HN120P  Primary   RHH"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "cell_info_df = pd.read_csv('../preprocessed_data/HN_patient_specific/cell_info.csv', index_col=0)\n",
    "cell_info_df = cell_info_df.loc[cadrres_cell_df.index]\n",
    "cell_info_df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-07-13T03:39:29.037248Z",
     "start_time": "2020-07-13T03:39:29.031480Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array(['HN120', 'HN137', 'HN148', 'HN159', 'HN160', 'HN182'], dtype=object)"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "patient_list = cell_info_df['patient_id'].unique()\n",
    "patient_list"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##### List all pairs of patient and drug"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-07-13T03:39:29.060922Z",
     "start_time": "2020-07-13T03:39:29.041161Z"
    },
    "code_folding": []
   },
   "outputs": [],
   "source": [
    "pred_delta_df = pd.DataFrame(cadrres_cell_df.values - drug_info_df[ref_type].values, columns=drug_list, index=cluster_list)\n",
    "pred_cv_df = 100 / (1 + (np.power(2, -pred_delta_df)))\n",
    "pred_kill_df = 100 - pred_cv_df\n",
    "\n",
    "# rows = []\n",
    "# for p in patient_list:\n",
    "#     c_list = cell_info_df[cell_info_df['patient_id']==p].index.values\n",
    "\n",
    "#     p_pred_delta_weighted = np.matmul(pred_delta_df.loc[c_list].values.T, freqs)\n",
    "#     p_pred_delta_mat = pred_delta_df.loc[c_list].values\n",
    "    \n",
    "#     p_pred_kill_weighted = np.matmul(pred_kill_df.loc[c_list].values.T, freqs)\n",
    "#     p_pred_kill_mat = pred_kill_df.loc[c_list].values\n",
    "\n",
    "#     for d_i, d_id in enumerate(drug_list):\n",
    "#         rows += [[p, d_id] + [] + [] + \n",
    "#                  ['|'.join([\"{:.14}\".format(f) for f in p_pred_delta_mat[:, d_i]])] + \n",
    "#                  [\"{:.14}\".format(p_pred_delta_weighted[d_i])] +\n",
    "#                  ['|'.join([\"{:.14}\".format(f) for f in p_pred_kill_mat[:, d_i]])] + \n",
    "#                  [\"{:.14}\".format(p_pred_kill_weighted[d_i])]\n",
    "#                 ]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-07-13T03:39:29.096065Z",
     "start_time": "2020-07-13T03:39:29.063067Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th>Drug ID</th>\n",
       "      <th>1001</th>\n",
       "      <th>1003</th>\n",
       "      <th>1004</th>\n",
       "      <th>1006</th>\n",
       "      <th>1007</th>\n",
       "      <th>1010</th>\n",
       "      <th>1012</th>\n",
       "      <th>1014</th>\n",
       "      <th>1015</th>\n",
       "      <th>1016</th>\n",
       "      <th>...</th>\n",
       "      <th>299</th>\n",
       "      <th>301</th>\n",
       "      <th>302</th>\n",
       "      <th>303</th>\n",
       "      <th>305</th>\n",
       "      <th>306</th>\n",
       "      <th>308</th>\n",
       "      <th>328</th>\n",
       "      <th>331</th>\n",
       "      <th>346</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>RHH2176</th>\n",
       "      <td>34.834361</td>\n",
       "      <td>37.965723</td>\n",
       "      <td>24.607262</td>\n",
       "      <td>34.784597</td>\n",
       "      <td>6.423581</td>\n",
       "      <td>11.815919</td>\n",
       "      <td>52.785694</td>\n",
       "      <td>33.173945</td>\n",
       "      <td>45.444520</td>\n",
       "      <td>36.329534</td>\n",
       "      <td>...</td>\n",
       "      <td>95.546368</td>\n",
       "      <td>68.674404</td>\n",
       "      <td>75.093315</td>\n",
       "      <td>76.803491</td>\n",
       "      <td>76.546903</td>\n",
       "      <td>60.828523</td>\n",
       "      <td>65.077005</td>\n",
       "      <td>70.974659</td>\n",
       "      <td>74.111578</td>\n",
       "      <td>79.535817</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>RHH2177</th>\n",
       "      <td>40.837758</td>\n",
       "      <td>48.954844</td>\n",
       "      <td>45.848571</td>\n",
       "      <td>42.489605</td>\n",
       "      <td>14.826852</td>\n",
       "      <td>18.973903</td>\n",
       "      <td>59.505834</td>\n",
       "      <td>49.939759</td>\n",
       "      <td>61.823409</td>\n",
       "      <td>59.617496</td>\n",
       "      <td>...</td>\n",
       "      <td>92.599915</td>\n",
       "      <td>60.306537</td>\n",
       "      <td>72.879929</td>\n",
       "      <td>70.917850</td>\n",
       "      <td>70.570972</td>\n",
       "      <td>52.978117</td>\n",
       "      <td>64.553926</td>\n",
       "      <td>62.739731</td>\n",
       "      <td>68.253969</td>\n",
       "      <td>74.591670</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>RHH2178</th>\n",
       "      <td>43.039377</td>\n",
       "      <td>60.005519</td>\n",
       "      <td>54.536336</td>\n",
       "      <td>55.925896</td>\n",
       "      <td>25.339381</td>\n",
       "      <td>27.383085</td>\n",
       "      <td>57.291212</td>\n",
       "      <td>64.099501</td>\n",
       "      <td>70.523027</td>\n",
       "      <td>65.324885</td>\n",
       "      <td>...</td>\n",
       "      <td>83.509854</td>\n",
       "      <td>50.873591</td>\n",
       "      <td>57.034678</td>\n",
       "      <td>60.325226</td>\n",
       "      <td>64.648760</td>\n",
       "      <td>50.592609</td>\n",
       "      <td>59.867037</td>\n",
       "      <td>55.060877</td>\n",
       "      <td>54.674392</td>\n",
       "      <td>47.865941</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>RHH2179</th>\n",
       "      <td>40.024930</td>\n",
       "      <td>42.744688</td>\n",
       "      <td>33.728215</td>\n",
       "      <td>34.709861</td>\n",
       "      <td>10.244698</td>\n",
       "      <td>15.324443</td>\n",
       "      <td>52.208751</td>\n",
       "      <td>25.438272</td>\n",
       "      <td>39.100017</td>\n",
       "      <td>46.316444</td>\n",
       "      <td>...</td>\n",
       "      <td>97.989581</td>\n",
       "      <td>76.105366</td>\n",
       "      <td>89.231365</td>\n",
       "      <td>80.350771</td>\n",
       "      <td>79.167061</td>\n",
       "      <td>68.508485</td>\n",
       "      <td>71.100745</td>\n",
       "      <td>79.956310</td>\n",
       "      <td>82.168266</td>\n",
       "      <td>86.053696</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>RHH2180</th>\n",
       "      <td>34.071193</td>\n",
       "      <td>26.108648</td>\n",
       "      <td>23.855711</td>\n",
       "      <td>27.656829</td>\n",
       "      <td>10.287865</td>\n",
       "      <td>18.236191</td>\n",
       "      <td>47.682697</td>\n",
       "      <td>22.670096</td>\n",
       "      <td>38.141393</td>\n",
       "      <td>48.430347</td>\n",
       "      <td>...</td>\n",
       "      <td>89.539174</td>\n",
       "      <td>49.806332</td>\n",
       "      <td>66.420851</td>\n",
       "      <td>61.048326</td>\n",
       "      <td>60.216509</td>\n",
       "      <td>48.248655</td>\n",
       "      <td>52.083905</td>\n",
       "      <td>32.152733</td>\n",
       "      <td>60.207613</td>\n",
       "      <td>41.253499</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>RHO713</th>\n",
       "      <td>27.370033</td>\n",
       "      <td>2.586817</td>\n",
       "      <td>8.484583</td>\n",
       "      <td>4.131592</td>\n",
       "      <td>3.233271</td>\n",
       "      <td>14.181521</td>\n",
       "      <td>26.828476</td>\n",
       "      <td>10.440088</td>\n",
       "      <td>14.493304</td>\n",
       "      <td>28.072442</td>\n",
       "      <td>...</td>\n",
       "      <td>84.720316</td>\n",
       "      <td>47.895021</td>\n",
       "      <td>66.861596</td>\n",
       "      <td>50.583998</td>\n",
       "      <td>56.954617</td>\n",
       "      <td>49.418161</td>\n",
       "      <td>30.020958</td>\n",
       "      <td>15.505470</td>\n",
       "      <td>44.766470</td>\n",
       "      <td>28.071581</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>RHO714</th>\n",
       "      <td>24.220024</td>\n",
       "      <td>6.903385</td>\n",
       "      <td>15.891031</td>\n",
       "      <td>11.129257</td>\n",
       "      <td>7.184816</td>\n",
       "      <td>12.177604</td>\n",
       "      <td>25.885932</td>\n",
       "      <td>15.168345</td>\n",
       "      <td>20.071440</td>\n",
       "      <td>31.852675</td>\n",
       "      <td>...</td>\n",
       "      <td>72.219594</td>\n",
       "      <td>42.884191</td>\n",
       "      <td>45.662666</td>\n",
       "      <td>44.259588</td>\n",
       "      <td>49.208348</td>\n",
       "      <td>40.401124</td>\n",
       "      <td>17.011399</td>\n",
       "      <td>8.830582</td>\n",
       "      <td>40.664800</td>\n",
       "      <td>11.635733</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>RHO715</th>\n",
       "      <td>34.599963</td>\n",
       "      <td>10.788091</td>\n",
       "      <td>20.674076</td>\n",
       "      <td>12.784234</td>\n",
       "      <td>10.334362</td>\n",
       "      <td>18.199795</td>\n",
       "      <td>40.649312</td>\n",
       "      <td>8.824563</td>\n",
       "      <td>16.815734</td>\n",
       "      <td>37.896669</td>\n",
       "      <td>...</td>\n",
       "      <td>84.658860</td>\n",
       "      <td>49.649983</td>\n",
       "      <td>65.775978</td>\n",
       "      <td>54.869469</td>\n",
       "      <td>57.423419</td>\n",
       "      <td>48.196305</td>\n",
       "      <td>43.092166</td>\n",
       "      <td>22.548129</td>\n",
       "      <td>53.211157</td>\n",
       "      <td>39.548974</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>RHO716</th>\n",
       "      <td>24.342612</td>\n",
       "      <td>3.670202</td>\n",
       "      <td>9.744860</td>\n",
       "      <td>7.665307</td>\n",
       "      <td>4.845182</td>\n",
       "      <td>12.498439</td>\n",
       "      <td>33.356578</td>\n",
       "      <td>12.401965</td>\n",
       "      <td>17.871554</td>\n",
       "      <td>22.017538</td>\n",
       "      <td>...</td>\n",
       "      <td>52.839648</td>\n",
       "      <td>29.382100</td>\n",
       "      <td>28.379530</td>\n",
       "      <td>39.887497</td>\n",
       "      <td>45.976720</td>\n",
       "      <td>33.761751</td>\n",
       "      <td>32.907347</td>\n",
       "      <td>9.360438</td>\n",
       "      <td>28.987178</td>\n",
       "      <td>21.214643</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>RHO717</th>\n",
       "      <td>30.555916</td>\n",
       "      <td>14.469699</td>\n",
       "      <td>22.001065</td>\n",
       "      <td>16.522492</td>\n",
       "      <td>7.478926</td>\n",
       "      <td>16.689852</td>\n",
       "      <td>31.498662</td>\n",
       "      <td>25.943762</td>\n",
       "      <td>33.296921</td>\n",
       "      <td>55.389722</td>\n",
       "      <td>...</td>\n",
       "      <td>81.032704</td>\n",
       "      <td>43.381476</td>\n",
       "      <td>58.088017</td>\n",
       "      <td>45.271033</td>\n",
       "      <td>53.639893</td>\n",
       "      <td>46.463019</td>\n",
       "      <td>20.398797</td>\n",
       "      <td>10.707974</td>\n",
       "      <td>42.306448</td>\n",
       "      <td>7.536769</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1116 rows × 81 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "Drug ID       1001       1003       1004       1006       1007       1010  \\\n",
       "RHH2176  34.834361  37.965723  24.607262  34.784597   6.423581  11.815919   \n",
       "RHH2177  40.837758  48.954844  45.848571  42.489605  14.826852  18.973903   \n",
       "RHH2178  43.039377  60.005519  54.536336  55.925896  25.339381  27.383085   \n",
       "RHH2179  40.024930  42.744688  33.728215  34.709861  10.244698  15.324443   \n",
       "RHH2180  34.071193  26.108648  23.855711  27.656829  10.287865  18.236191   \n",
       "...            ...        ...        ...        ...        ...        ...   \n",
       "RHO713   27.370033   2.586817   8.484583   4.131592   3.233271  14.181521   \n",
       "RHO714   24.220024   6.903385  15.891031  11.129257   7.184816  12.177604   \n",
       "RHO715   34.599963  10.788091  20.674076  12.784234  10.334362  18.199795   \n",
       "RHO716   24.342612   3.670202   9.744860   7.665307   4.845182  12.498439   \n",
       "RHO717   30.555916  14.469699  22.001065  16.522492   7.478926  16.689852   \n",
       "\n",
       "Drug ID       1012       1014       1015       1016  ...        299  \\\n",
       "RHH2176  52.785694  33.173945  45.444520  36.329534  ...  95.546368   \n",
       "RHH2177  59.505834  49.939759  61.823409  59.617496  ...  92.599915   \n",
       "RHH2178  57.291212  64.099501  70.523027  65.324885  ...  83.509854   \n",
       "RHH2179  52.208751  25.438272  39.100017  46.316444  ...  97.989581   \n",
       "RHH2180  47.682697  22.670096  38.141393  48.430347  ...  89.539174   \n",
       "...            ...        ...        ...        ...  ...        ...   \n",
       "RHO713   26.828476  10.440088  14.493304  28.072442  ...  84.720316   \n",
       "RHO714   25.885932  15.168345  20.071440  31.852675  ...  72.219594   \n",
       "RHO715   40.649312   8.824563  16.815734  37.896669  ...  84.658860   \n",
       "RHO716   33.356578  12.401965  17.871554  22.017538  ...  52.839648   \n",
       "RHO717   31.498662  25.943762  33.296921  55.389722  ...  81.032704   \n",
       "\n",
       "Drug ID        301        302        303        305        306        308  \\\n",
       "RHH2176  68.674404  75.093315  76.803491  76.546903  60.828523  65.077005   \n",
       "RHH2177  60.306537  72.879929  70.917850  70.570972  52.978117  64.553926   \n",
       "RHH2178  50.873591  57.034678  60.325226  64.648760  50.592609  59.867037   \n",
       "RHH2179  76.105366  89.231365  80.350771  79.167061  68.508485  71.100745   \n",
       "RHH2180  49.806332  66.420851  61.048326  60.216509  48.248655  52.083905   \n",
       "...            ...        ...        ...        ...        ...        ...   \n",
       "RHO713   47.895021  66.861596  50.583998  56.954617  49.418161  30.020958   \n",
       "RHO714   42.884191  45.662666  44.259588  49.208348  40.401124  17.011399   \n",
       "RHO715   49.649983  65.775978  54.869469  57.423419  48.196305  43.092166   \n",
       "RHO716   29.382100  28.379530  39.887497  45.976720  33.761751  32.907347   \n",
       "RHO717   43.381476  58.088017  45.271033  53.639893  46.463019  20.398797   \n",
       "\n",
       "Drug ID        328        331        346  \n",
       "RHH2176  70.974659  74.111578  79.535817  \n",
       "RHH2177  62.739731  68.253969  74.591670  \n",
       "RHH2178  55.060877  54.674392  47.865941  \n",
       "RHH2179  79.956310  82.168266  86.053696  \n",
       "RHH2180  32.152733  60.207613  41.253499  \n",
       "...            ...        ...        ...  \n",
       "RHO713   15.505470  44.766470  28.071581  \n",
       "RHO714    8.830582  40.664800  11.635733  \n",
       "RHO715   22.548129  53.211157  39.548974  \n",
       "RHO716    9.360438  28.987178  21.214643  \n",
       "RHO717   10.707974  42.306448   7.536769  \n",
       "\n",
       "[1116 rows x 81 columns]"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pred_kill_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-07-13T03:39:29.328604Z",
     "start_time": "2020-07-13T03:39:29.098619Z"
    }
   },
   "outputs": [],
   "source": [
    "pred_kill_df.to_csv(out_dir + 'pred_drug_kill_{}_{}_cell.csv'.format(ref_type, model_name))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-07-11T09:06:01.626005Z",
     "start_time": "2020-07-11T09:06:01.608026Z"
    }
   },
   "outputs": [],
   "source": [
    "pred_pateint_delta_df = pd.merge(pred_delta_df, cell_info_df[['patient_id']], left_index=True, right_index=True).groupby('patient_id').mean()\n",
    "pred_pateint_kill_df = pd.merge(pred_kill_df, cell_info_df[['patient_id']], left_index=True, right_index=True).groupby('patient_id').mean()\n",
    "\n",
    "pred_pateint_delta_df = pred_pateint_delta_df.stack().reset_index()\n",
    "pred_pateint_delta_df.columns = ['patient', 'drug_id', 'delta']\n",
    "\n",
    "pred_pateint_kill_df = pred_pateint_kill_df.stack().reset_index()\n",
    "pred_pateint_kill_df.columns = ['patient', 'drug_id', 'kill']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-07-11T09:06:01.644321Z",
     "start_time": "2020-07-11T09:06:01.628027Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>patient</th>\n",
       "      <th>drug_id</th>\n",
       "      <th>delta</th>\n",
       "      <th>kill</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>HN120</td>\n",
       "      <td>1001</td>\n",
       "      <td>0.941144</td>\n",
       "      <td>34.518773</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>HN120</td>\n",
       "      <td>1003</td>\n",
       "      <td>1.660548</td>\n",
       "      <td>26.558972</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>HN120</td>\n",
       "      <td>1004</td>\n",
       "      <td>1.438976</td>\n",
       "      <td>28.542245</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>HN120</td>\n",
       "      <td>1006</td>\n",
       "      <td>1.629793</td>\n",
       "      <td>26.066463</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>HN120</td>\n",
       "      <td>1007</td>\n",
       "      <td>3.234392</td>\n",
       "      <td>10.897336</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  patient drug_id     delta       kill\n",
       "0   HN120    1001  0.941144  34.518773\n",
       "1   HN120    1003  1.660548  26.558972\n",
       "2   HN120    1004  1.438976  28.542245\n",
       "3   HN120    1006  1.629793  26.066463\n",
       "4   HN120    1007  3.234392  10.897336"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "single_drug_pred_df = pd.merge(pred_pateint_delta_df, pred_pateint_kill_df, left_on=['patient', 'drug_id'], right_on=['patient', 'drug_id'])\n",
    "single_drug_pred_df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-07-11T09:06:01.652174Z",
     "start_time": "2020-07-11T09:06:01.646267Z"
    }
   },
   "outputs": [],
   "source": [
    "single_drug_pred_df.loc[:, 'drug_name'] = [drug_id_name_dict[d] for d in single_drug_pred_df['drug_id'].values]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Save results"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "ExecuteTime": {
     "end_time": "2020-07-11T09:06:01.664216Z",
     "start_time": "2020-07-11T09:06:01.653849Z"
    }
   },
   "outputs": [],
   "source": [
    "if dosage_shifted:\n",
    "    single_drug_pred_df.to_csv(out_dir + 'pred_drug_kill_{}_{}_shifted.csv'.format(ref_type, model_name), index=False)\n",
    "else:\n",
    "    single_drug_pred_df.to_csv(out_dir + 'pred_drug_kill_{}_{}.csv'.format(ref_type, model_name), index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.7"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": false,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {
    "height": "calc(100% - 180px)",
    "left": "10px",
    "top": "150px",
    "width": "175.531px"
   },
   "toc_section_display": true,
   "toc_window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
